# This file contains the S-plus code that was used to implement the estimators and functionals in "Variable bandwidths for nonparametric hazard rate estimation", Communications in Statistics - Theory and Methods, (38): 1055-1078, (2009)
# As a fully working example the code herein can reproduce Figure 5 of the paper (the relevant data are also given below). 
# The same functions (with different input data) can be used in reproducing other figures of the same paper 
# More general use of the estimates is also feasible by supplying the corresponding data set.  Use of the functions is self - explanatory and is further facilitated by the comments at he preamble of each function.

#N.B. Compatibility with R is not guaranteed as the functions were not written for use with R - however, with minimal modifications, their use in the R environment should be feasible.

igaussian <- function(x) { ifelse(x < -4, 0, ifelse(x> 4, 1, pnorm(x, 0, 1)))}#, ifelse(abs(x) > 4, 0, 0)) }

"gaussian1"<-
function(x){ifelse(x < -4, 0, ifelse(x> 4, 0, dnorm(x, 0, 1)))}#, ifelse(abs(x) > 4, 0, 0)) 
		#ifelse(abs(x)< 4, dnorm(x,0,1), ifelse(abs(x)>4, 0,0))
#}
gaussian2 <- function(x) { ifelse(x < -4, 0, ifelse(x> 4, 0, dnorm(x, 0, 1)))}

"HigherOrder"<-function(x){ifelse(x < -4, 0, ifelse(x> 4, 0, dnorm(x, 0, 1) * (3-x^2)/2))}#, ifelse(abs(x) > 4, 0, 0)) }

f<-function(x)
{
	2/3 * dnorm(x,3,1)+1/3*dnorm(x,2,.2)
}

Intf<-function(x)
{
	2/3 * pnorm(x,3,1)+1/3*pnorm(x,2,.2)
}

NormHazard<-function(x) { f(x)/(1-Intf(x)) }

"Thre1"<- #Watson - Leadbetter hazard rate (see Phd)
function(xin, xout, kfun)
{
    n<- length(xin)
    n1<-1:n
    xin.use<-sort(xin)
    h<-0.23*diff(quantile(xin.use, c(.25, .75)))#NOTE: It doesn't matter if I use xin.use or xin in the interquartile.
    arg1<-(sapply(xout, "-", xin.use))/h	
    arg2<-kfun(arg1)* 1/(n-n1+1)
    arg3<-colSums(arg2)* 1/h
    arg3
}

"Thre11"<- #Watson - Leadbetter hazard rate (see Phd)
function(xin, xout, kfun)
{
    n<- length(xin)
    n1<-1:n
    xin.use<-sort(xin)
    h<-0.16*diff(quantile(xin.use, c(.25, .75)))#NOTE: It doesn't matter if I use xin.use or xin in the interquartile.
    arg1<-(sapply(xout, "-", xin.use))/h	
    arg2<-kfun(arg1)* 1/(n-n1+1)
    arg3<-colSums(arg2)* 1/h
    arg3
}


"iThre1"<- #Watson - Leadbetter integrated hazard rate (cum. haz. est.)
function(xin, xout, kfun)
{
    n<- length(xin)
    n1<-1:n
    xin.use<-sort(xin)
    h<-0.23*diff(quantile(xin.use, c(.25, .75)))
    arg1<-(sapply(xout, "-", xin.use))/h	
    arg2<-kfun(arg1)* 1/(n-n1+1)
    arg3<-colSums(arg2)
    arg3 
}

"SimpsonInt"<- function(xin, h)
{   #xin = data, h=distance of the points at which function is evaluated
	 n<-length(xin)
    nn<-1:n
    int0<-xin[1]
    int2n<-xin[n]
    inteven<-xin[seq(3,n-1,by=2)]
    intodd<-xin[seq(2,n-2,by=2)]
    sumodd<-sum(intodd)
    sumeven<-sum(inteven)
    res<-(h/3)*(int0+4*sumodd+2*sumeven+int2n)
    res
}

"ikde"<- function(xin, xout, kfun)
{
    n<- length(xin)
    h<-0.23*diff(quantile(xin, c(.25, .75)))
    arg1<-(sapply(xout, "-", xin))/h	
    arg2<-kfun(arg1)
    arg3<-colSums(arg2)
    arg3/n
}

"Thre2"<-
	#Transformed hazard rate estimator (2nd stage) using nonparametric transformation -log(1-\hat F(x))
function(xin, xout, kfun)
{
	n<-length(xin)
	n1<-1:n
	nn<-length(xout)
	xin.use<-sort(xin)
	GofX<- -logb(1- ikde(xin.use, xout, igaussian), base=exp(1))#, base=exp(1)) #iThre1(xin.use, xout, igaussian)
	Tsample<- -logb(1- ikde(xin.use, xin.use, igaussian), base=exp(1))#, base=exp(1)) #iThre1(xin.use, xin.use, igaussian)	
	hVector<-seq(0.01, 1, by=.01) #seq(0.01, 10, by=.01)
	LengthhVec<-length(hVector) 	
	TransEstimate<-sapply(1:LengthhVec, function(i, xin.use, xout, kfun, Tsample, n, n1, GofX,  hVector)
					{
						h2<-hVector[i]*diff(quantile(xin.use, c(.25, .75))) #.5
						GofXdashed<- Thre1(xin.use, xout,  kfun)  #NormHazard(xout) #
						arg1<-(sapply(GofX, "-", Tsample))/h2
						arg2<-kfun(arg1)* 1/(n-n1+1)
						arg3<-colSums(arg2)
						arg3*GofXdashed/h2 					
					} ,xin.use, xout, kfun, Tsample, n, n1, GofX,  hVector)
	real.curve <-matrix(NormHazard(xout), nrow=nrow(TransEstimate), ncol=ncol(TransEstimate))
	MSE<-(TransEstimate - real.curve)^2 		
	dist<-xout[2]-xout[1]	
	IMSE<-sapply(1:ncol(MSE), function(i, MSE, dist) SimpsonInt(MSE[,i], dist), MSE, dist)
	index<-order(IMSE)[1]
	TransEstimate[,index]
}


"VarHre"<-
#Adaptive variable bandwidth hazard rate estimator
function(xin, xout, h2, kfun=Epanechnikov)
	{
	#Simple kernel function xin: data, xout: points at which we evaluate the est.
	#h: bandwidth default kernel: biweight
	n<- length(xin)
	nn<-length(xout)
	n1<-1:n
	xin.use<-sort(xin)
	kernel<-Thre1(xin.use, xin.use, kfun) #Pilot estimate (W-L)
	SqrtKernel<-sqrt(kernel)
	vhre<-sapply(1:nn, function(i, xin.use, xout, h2, kfun, n, n1, SqrtKernel)
							{ 
							 sum( (SqrtKernel *  kfun( ((xout[i]-xin.use)/h2) * SqrtKernel)	)/(n-n1+1))							
							}, xin.use, xout, h2, kfun, n, n1, SqrtKernel)
	vhre/h2
	}
#for(i in 1:30)
#{
set.seed(64)
x<-seq(0, 3.9,length=100)
plot(x, NormHazard(x),  type="l", xlab = "x", ylab="Hazard rate", xlim=c(0,4))
key(text=list(c("variable bandwidth", "plug-in estimate", "higher order", "ordinary", "True"), font =4), 
lines = list(lty=c(3,7,6, 8, 0)), corner=c(0,1),
cex= 1.15) #cex specifies font size

x1<- c(rnorm(ceiling(2/3 *400),3,1),rnorm(ceiling(1/3*400),2,.2))

cat("\n", quantile(x1, c(.9)), "\n")

arg1<-Thre2(x1, x, gaussian1)
arg2<-VarHre(x1, x, 0.15)
horder<-Thre1(x1, x, HigherOrder)
arg3<-Thre11(x1,x,gaussian1)

arg1[c(90:100)]<-arg3[c(90:100)]

lines(x, arg1, lty=3)  # TRANSFORMED ESTIMATE HAS BEEN CREATED HERE
lines(x, arg2, lty=7)  #VARIABLE BANDWIDTH ESTIMATE HAS BEEN CREATED HERE
lines(x, horder, lty=6)
lines(x, arg3, lty=8)
#}